{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "c72d57c3",
   "metadata": {},
   "source": [
    "# IMPORT LIBRARIES"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f861754e",
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import json\n",
    "import os\n",
    "import networkx as nx\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker as ticker\n",
    "import matplotlib.colors as mplclr\n",
    "import copy as cp\n",
    "import itertools\n",
    "    \n",
    "    \n",
    "mpl.rcParams['figure.figsize'] = (10,10)\n",
    "mpl.rcParams['xtick.direction'] = 'in'\n",
    "mpl.rcParams['ytick.direction'] = 'in'\n",
    "mpl.rcParams['mathtext.fontset'] = 'cm'\n",
    "mpl.rcParams['mathtext.rm'] = 'serif'\n",
    "\n",
    "print('libraries loaded!')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "17d79d83",
   "metadata": {},
   "source": [
    "# Loading common variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ba8c1a1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# working directory\n",
    "WD = os.getcwd()\n",
    "\n",
    "print('Workind Directory is ', WD)\n",
    "\n",
    "\n",
    "# output directory\n",
    "outdirname = WD[:-4]+'results/'\n",
    "\n",
    "print('Output Directory is', outdirname)\n",
    "\n",
    "\n",
    "# RGB custom color definition\n",
    "# (from https://coolors.co/8ecae6-219ebc-023047-ffb703-fb8500)\n",
    "\n",
    "mycustom_colors = {\"mycol1\":\"#8ecae6\",\\\n",
    "                   \"mycol2\":\"#219ebc\",\\\n",
    "                   \"mycol3\":\"#023047\",\\\n",
    "                   \"mycol4\":\"#ffb703\",\\\n",
    "                   \"mycol5\":\"#fb8500\"}\n",
    "\n",
    "# RGB custom color definition\n",
    "# (from https://coolors.co/264653-2a9d8f-e9c46a-f4a261-e76f51)\n",
    "\n",
    "#mycustom_colors = {\"mycol1\":\"#264653\",\\\n",
    "#                   \"mycol2\":\"#2a9d8f\",\\\n",
    "#                   \"mycol3\":\"#e9c46a\",\\\n",
    "#                   \"mycol4\":\"#f4a261\",\\\n",
    "#                   \"mycol5\":\"#e76f51\"}\n",
    "\n",
    "\n",
    "# Definisco i colori per men, women, unknown\n",
    "\n",
    "mycolor_men = mycustom_colors['mycol3']\n",
    "mycolor_women = mycustom_colors['mycol5']\n",
    "mycolor_unkno = mycustom_colors['mycol1']\n",
    "\n",
    "print('Common variables loaded!')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ada25c46",
   "metadata": {},
   "source": [
    "# Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2db1c8de",
   "metadata": {},
   "outputs": [],
   "source": [
    "def reading_net(lwd, lfname):\n",
    "    \"\"\"Reading the files of nodes/edges and using them to populate the networks\"\"\"\n",
    "    \n",
    "    ldirinname = lwd[:-4]+\"data/\"\n",
    "    lfnamenodes = lfname+'-graph_nodes.json'\n",
    "    lfnameedges = lfname+'-graph_edges.json'\n",
    "\n",
    "    with open(ldirinname+lfnamenodes) as jfile:\n",
    "        lndata = json.load(jfile)\n",
    "\n",
    "        \n",
    "    with open(ldirinname+lfnameedges) as jfile:\n",
    "        ledata = json.load(jfile)\n",
    "\n",
    "    # creating the graph\n",
    "    print('\\tCreating graph %s ...' %(lfname))\n",
    "    lmyG = nx.Graph()\n",
    "    \n",
    "    for k,v in lndata.items():\n",
    "        lmyG.add_node(int(k), name=v['name'], gender=v['gender'])\n",
    "    \n",
    "    print('\\tThe network has %d nodes' %(lmyG.number_of_nodes()))\n",
    "        \n",
    "    for k,v in ledata.items():\n",
    "        lidfrom = int(v[0])\n",
    "        lidto = int(v[1])\n",
    "        lmyG.add_edge(lidfrom, lidto, weight=v[2]['weight'], etype=v[2]['etype'])\n",
    "    \n",
    "    print('\\tThe network has %d edges' %(lmyG.number_of_edges()))\n",
    "    \n",
    "    \n",
    "    return lmyG\n",
    "    \n",
    "\n",
    "def jaccard_edgesets(lG1, lG2):\n",
    "    \"\"\"Computing the Jaccard score of the edge sets\"\"\"\n",
    "    \n",
    "    leset1 = set()\n",
    "    leset2 = set()\n",
    "    \n",
    "    for e in lG1.edges():\n",
    "        lid1 = e[0]\n",
    "        lid2 = e[1]\n",
    "        \n",
    "        leset1.add((lid1,lid2))\n",
    "        leset1.add((lid2,lid1))\n",
    "        \n",
    "    for e in lG2.edges():\n",
    "        lid1 = e[0]\n",
    "        lid2 = e[1]\n",
    "        \n",
    "        leset2.add((lid1,lid2))\n",
    "        leset2.add((lid2,lid1))\n",
    "    \n",
    "    lintsize = float(len(leset1.intersection(leset2)))\n",
    "    lunisize = float(len(leset1.union(leset2)))\n",
    "    \n",
    "    ljaccard = lintsize/lunisize\n",
    "    \n",
    "    return ljaccard\n",
    "\n",
    "\n",
    "def component_analysis(lmyG):\n",
    "    \"\"\"Analizes the components of the network \"\"\"\n",
    "    \n",
    "    # giant component\n",
    "    lgccG = lmyG.subgraph(max(nx.connected_components(lmyG), key=len)).copy()\n",
    "    lgcc_nrnodes = lgccG.number_of_nodes()\n",
    "    lsize_giant = float(lgcc_nrnodes)/float(lmyG.number_of_nodes())\n",
    "    \n",
    "    \n",
    "    lcompons = [len(c) for c in sorted(nx.connected_components(lmyG), key=len, reverse=True)]\n",
    "    lnrcompons = len(lcompons)\n",
    "        \n",
    "    lnr_isol_nodes = 0\n",
    "    for e in lcompons:\n",
    "        if e == 1:\n",
    "            lnr_isol_nodes += 1\n",
    "            \n",
    "    # return nr components | nr isolated nodes | size giant\n",
    "    return lnrcompons, lnr_isol_nodes, lsize_giant\n",
    "    \n",
    "\n",
    "# Declaring a function to extract the top-n list of authors according\n",
    "# to a given centrality\n",
    "\n",
    "def extract_top(lnrtop, lcentr_dict, lG):\n",
    "    \"\"\"Extracting the list of the top-n male & female authors according to\n",
    "       a given centrality index\n",
    "       \n",
    "       INPUT\n",
    "       \n",
    "       lnrtop --> number of top-n to extract (eg. 100)\n",
    "       lcentr_dict --> dictionary of (node_id, centrality) pairs\n",
    "       lG --> graph (to extract the authors' names)\n",
    "       \n",
    "       \n",
    "       OUTPUT\n",
    "       \n",
    "       lm_top --> list of top-n male authors\n",
    "       lf_top --> list of top-n female authors\n",
    "       \n",
    "       \"\"\"\n",
    "\n",
    "    lm_top = []\n",
    "    lf_top = []\n",
    "    \n",
    "    lsorted_centr = sorted(lcentr_dict.items(), key=lambda kv: kv[1], reverse=True)\n",
    "\n",
    "    #print('Preparing the list of top-%d most central male/female' %(lnrtop))\n",
    "\n",
    "    for elem in lsorted_centr:\n",
    "        lmyid = elem[0]\n",
    "        lmycentr = elem[1]\n",
    "        lmygend = lG.nodes[lmyid]['gender']\n",
    "        lmyname = lG.nodes[lmyid]['name']\n",
    "    \n",
    "        if ((lmygend == 'male') and (len(lm_top) < lnrtop)):\n",
    "            lm_top.append((lmyname,lmycentr))\n",
    "        \n",
    "        if ((lmygend == 'female') and (len(lf_top) < lnrtop)):\n",
    "            lf_top.append((lmyname,lmycentr))\n",
    "        \n",
    "        if ((len(lm_top) == lnrtop) and (len(lf_top) == lnrtop)):\n",
    "            break\n",
    "\n",
    "            \n",
    "    return lm_top, lf_top\n",
    "\n",
    "\n",
    "def swap_edges(lG,e1,e2):\n",
    "    \"\"\"Perform the swap of stubs between two edges while avoiding self-loops and existing edges\"\"\"\n",
    "    \n",
    "    retval = 0\n",
    "    \n",
    "    e11 = e1[0]\n",
    "    e12 = e1[1]\n",
    "    \n",
    "    e21 = e2[0]\n",
    "    e22 = e2[1]\n",
    "    \n",
    "    \n",
    "    # Swap (e11,e12),(e21,e22) --> (e11,e22),(e12,e21)\n",
    "    \n",
    "    if ((e11 != e22) and (e12 != e21)):\n",
    "        if (not lG.has_edge(e11,e22)) and (not lG.has_edge(e12,e21)):\n",
    "            lG.remove_edge(e11,e12)\n",
    "            lG.remove_edge(e21,e22)\n",
    "            \n",
    "            lG.add_edges_from([(e11,e22),(e12,e21)])\n",
    "            \n",
    "            retval = 1\n",
    "            return retval\n",
    "    \n",
    "    # Swap (e11,e12),(e21,e22) --> (e11,e21),(e12,e22)\n",
    "    \n",
    "    if ((e11 != e21) and (e12 != e22)):\n",
    "        if (not lG.has_edge(e11,e21)) and (not lG.has_edge(e12,e22)):\n",
    "            lG.remove_edge(e11,e12)\n",
    "            lG.remove_edge(e21,e22)\n",
    "            \n",
    "            lG.add_edges_from([(e11,e21),(e12,e22)])\n",
    "            \n",
    "            retval = 1\n",
    "            return retval\n",
    "\n",
    "        \n",
    "    # if the swap is not possible, return 0 \n",
    "    return retval\n",
    "    \n",
    "\n",
    "print('Functions loaded!')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a7c2e8d5",
   "metadata": {},
   "source": [
    "# Reading Networks"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15deadd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "print('Reading the networks')\n",
    "\n",
    "# collective bodies\n",
    "cbG = reading_net(WD, 'cb')\n",
    "\n",
    "# events\n",
    "evG = reading_net(WD, 'ev')\n",
    "\n",
    "# publication\n",
    "pbG = reading_net(WD, 'pub')\n",
    "\n",
    "# correspondence\n",
    "coG = reading_net(WD, 'cor')\n",
    "\n",
    "# personal relationships\n",
    "reG = reading_net(WD, 'rel')\n",
    "\n",
    "# defining some common variables\n",
    "\n",
    "graph_list = [reG, coG, cbG, pbG, evG]\n",
    "label_list = ['REL', 'COR', 'CB', 'PUB', 'EV']\n",
    "  \n",
    "\n",
    "nr_graphs = len(graph_list)\n",
    "\n",
    "\n",
    "print('End of reading')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "dc8b9ffe",
   "metadata": {},
   "source": [
    "## Computing nr of men, women, other"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "adeda97d",
   "metadata": {},
   "outputs": [],
   "source": [
    "nrmen = 0\n",
    "nrwomen = 0\n",
    "nrunkow = 0\n",
    "\n",
    "for n in cbG.nodes(data=True):\n",
    "    gen = n[1]['gender']\n",
    "    \n",
    "    if gen == 'male':\n",
    "        nrmen += 1\n",
    "    elif gen == 'female':\n",
    "        nrwomen += 1\n",
    "    else:\n",
    "        nrunkow += 1\n",
    "        \n",
    "print('Of %d nodes, we have %d men, %d women and %d unknown (sum = %d)'\\\n",
    "      %(cbG.number_of_nodes(), nrmen, nrwomen, nrunkow, sum([nrmen,nrwomen,nrunkow])))\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "id": "62de2bc5",
   "metadata": {},
   "source": [
    "## Computing edge overlap (via Jaccard score)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b57828c",
   "metadata": {},
   "outputs": [],
   "source": [
    "jacmatrix = np.ones((nr_graphs,nr_graphs))\n",
    "\n",
    "print('Computing the edge overlap ...')\n",
    "\n",
    "for i in range(nr_graphs-1):\n",
    "    myG1 = graph_list[i]\n",
    "    myl1 = label_list[i]\n",
    "    for j in range(i+1, nr_graphs):\n",
    "        myG2 = graph_list[j]\n",
    "        myl2 = label_list[j]\n",
    "        \n",
    "        myjac = jaccard_edgesets(myG1, myG2)\n",
    "        \n",
    "        jacmatrix[i][j] = myjac\n",
    "        jacmatrix[j][i] = myjac\n",
    "        \n",
    "        print('\\tThe Jaccard between %s and %s is %.4f' %(myl1, myl2, myjac))\n",
    "\n",
    "# saving results on file\n",
    "fnameout = 'graphs-edge-jaccard_data.dat'\n",
    "print('Saving file %s ...\\n' %(fnameout))\n",
    "\n",
    "np.savetxt(outdirname+fnameout, jacmatrix, fmt='%.4f')\n",
    "    \n",
    "print('done')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3be79878",
   "metadata": {},
   "outputs": [],
   "source": [
    "jacmatrix"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f0d8d64b",
   "metadata": {},
   "source": [
    "## Computing components in the network"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5fa20b01",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(nr_graphs):\n",
    "    \n",
    "    myG = graph_list[i]\n",
    "    mylabel = label_list[i]\n",
    "    \n",
    "    nrcomps, nr_isol_nod, size_giant = component_analysis(myG)\n",
    "    \n",
    "    print('For graph %s we have:' %(mylabel))\n",
    "    print('\\t%d  %d  %d  %.4f\\n' %(myG.number_of_nodes(), nrcomps, nr_isol_nod, size_giant))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "1f5f901e",
   "metadata": {},
   "source": [
    "## Analysis of the degrees"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e992ec2b",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(nr_graphs):\n",
    "    \n",
    "    myG = graph_list[i]\n",
    "    mylabel = label_list[i]\n",
    "    \n",
    "    degs = [e[1] for e in myG.degree()]\n",
    "    mindeg = min(degs)\n",
    "    maxdeg = max(degs)\n",
    "    \n",
    "    print('For network %s mindeg = %d  and  maxdeg = %d' %(mylabel, mindeg, maxdeg))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "76041dc7",
   "metadata": {},
   "source": [
    "# Merging networks into one"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "807b1034",
   "metadata": {},
   "outputs": [],
   "source": [
    "print('Creating merged network')\n",
    "\n",
    "allG = nx.Graph()\n",
    "\n",
    "print('Adding nodes ...')\n",
    "\n",
    "allG.add_nodes_from(cbG.nodes(data=True))\n",
    "\n",
    "# iterating over networks\n",
    "\n",
    "for i in range(nr_graphs):\n",
    "    \n",
    "    myG = graph_list[i]\n",
    "    mylabel = label_list[i]\n",
    "\n",
    "    print('\\tAdding edges of graph %s ...' %(mylabel))\n",
    "    \n",
    "    for e in myG.edges(data=True):\n",
    "        idfrom = e[0]\n",
    "        idto = e[1]\n",
    "        w = e[2]['weight']\n",
    "        \n",
    "        if allG.has_edge(idfrom,idto):\n",
    "            allG[idfrom][idto]['weight'] += w\n",
    "        else:\n",
    "            allG.add_edge(idfrom,idto,weight=w)\n",
    "        \n",
    "allG.remove_edges_from(nx.selfloop_edges(allG))\n",
    "\n",
    "print('There are %d edges ' %(allG.number_of_edges()))\n",
    "\n",
    "# maximum degree\n",
    "\n",
    "print('\\nThe maximum degree is %d ' %(max([e[1] for e in allG.degree()])))\n",
    "\n",
    "# analisys of the components\n",
    "\n",
    "nrcomps, nr_isol_nod, size_giant = component_analysis(allG)\n",
    "    \n",
    "print('Components analisys:')\n",
    "print('\\t%d  %d  %d  %.4f\\n' %(allG.number_of_nodes(), nrcomps, nr_isol_nod, size_giant))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8bc5f176",
   "metadata": {},
   "source": [
    "## k-core Decomposition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "13f324bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "nrtop = 50\n",
    "\n",
    "print('Performing the k-core decomposition ...')\n",
    "\n",
    "nx_coreness = nx.core_number(allG)\n",
    "\n",
    "# extracting the n-top inner k-core\n",
    "\n",
    "m_kc_ntop, f_kc_ntop = extract_top(nrtop, nx_coreness, allG)\n",
    "\n",
    "# extracting the degeneracy\n",
    "\n",
    "degen = max(nx_coreness.values())\n",
    "\n",
    "print('The degeneracy of the whole network is %d \\n'%(degen))\n",
    "\n",
    "# computing the \"density of women\" per k-shell (only on the complete network)\n",
    "# the columns 1 to 3 are the \"raw\" densities whereas columns 4 to 6 are the cumulative\n",
    "# values\n",
    "\n",
    "ks_gender_density = np.zeros((degen+1,7))\n",
    "\n",
    "for k,v in nx_coreness.items():\n",
    "    ks = v\n",
    "    gend = allG.nodes[k]['gender']\n",
    "\n",
    "    # filling data structure\n",
    "    ks_gender_density[ks][0] = ks\n",
    "\n",
    "    # updating the counters\n",
    "    if gend == 'male':\n",
    "        ks_gender_density[ks][1] += 1.\n",
    "    elif gend == 'female':\n",
    "        ks_gender_density[ks][2] += 1.\n",
    "    else:\n",
    "        ks_gender_density[ks][3] += 1.\n",
    "\n",
    "        \n",
    "# populating the cumulative part\n",
    "for i in range(len(ks_gender_density)):\n",
    "    ks_gender_density[i][0] = i\n",
    "    ks_gender_density[i][4] = sum(ks_gender_density[i:,1])\n",
    "    ks_gender_density[i][5] = sum(ks_gender_density[i:,2])\n",
    "    ks_gender_density[i][6] = sum(ks_gender_density[i:,3])\n",
    "\n",
    "    \n",
    "# normalization\n",
    "\n",
    "for i in range(len(ks_gender_density)):\n",
    "    norm = sum(ks_gender_density[i][1:4])\n",
    "    \n",
    "    nr_m = ks_gender_density[i][4]\n",
    "    nr_f = ks_gender_density[i][5]\n",
    "    nr_o = ks_gender_density[i][6]\n",
    "    \n",
    "    norm2 = nr_m + nr_f + nr_o\n",
    "    \n",
    "    for j in range(4,7):\n",
    "        ks_gender_density[i][j] /= norm2\n",
    "    \n",
    "    if norm > 0:\n",
    "        for j in range(1,4):\n",
    "            ks_gender_density[i][j] /= norm\n",
    "        \n",
    "# saving on file\n",
    "\n",
    "dummy = list()\n",
    "count = 0\n",
    "\n",
    "for i in range(len(ks_gender_density)):\n",
    "    mysum = sum(ks_gender_density[i,1:4])\n",
    "    \n",
    "    if mysum > 0.:\n",
    "        dummy.append([count, int(ks_gender_density[i][0]), ks_gender_density[i][1],\\\n",
    "                      ks_gender_density[i][2], ks_gender_density[i][3]])\n",
    "        count += 1\n",
    "\n",
    "fnameout = 'k_core-gender-data.dat'\n",
    "myfmt = '%d %d %.8e %.8e %.8e'\n",
    "print('Saving file %s ...\\n' %(fnameout))\n",
    "\n",
    "np.savetxt(outdirname+fnameout, np.asarray(dummy), fmt=myfmt)\n",
    "        \n",
    "        \n",
    "print('End of k-core analysis')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b0ae050e",
   "metadata": {},
   "outputs": [],
   "source": [
    "nx_coreness = nx.core_number(allG)\n",
    "genders = nx.get_node_attributes(cbG, \"gender\")\n",
    "names = nx.get_node_attributes(cbG, \"name\")\n",
    "\n",
    "mylist = []\n",
    "\n",
    "for k,v in nx_coreness.items():\n",
    "    name = names[k]\n",
    "    gend = genders[k]\n",
    "    if gend == 'female':\n",
    "        #print(k, name, gend, v)\n",
    "        mylist.append((name,v))\n",
    "        \n",
    "mylist.sort(key=lambda tup: tup[1], reverse=True)\n",
    "\n",
    "for e in mylist:\n",
    "    print(e[0],' | ', e[1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b4ad8c98",
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(nrtop):\n",
    "    print('%s %d\\t|\\t%s %d' %(m_kc_ntop[i][0], m_kc_ntop[i][1], f_kc_ntop[i][0], f_kc_ntop[i][1]))\n",
    "    \n",
    "print('\\n\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "01c37a29",
   "metadata": {},
   "outputs": [],
   "source": [
    "ks_gender_density"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0467374d",
   "metadata": {},
   "source": [
    "### Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6ae5e643",
   "metadata": {},
   "outputs": [],
   "source": [
    "#### PLOTTING THE RESULTS ####\n",
    "\n",
    "# setting common variables\n",
    "\n",
    "barwidth = 1.\n",
    "\n",
    "fig = plt.figure(figsize=(10,10))\n",
    "fig.subplots(1, 1)\n",
    "\n",
    "### SUBPLOT 1 (GENDER FRACTION PER K-SHELL)\n",
    "\n",
    "ax1 = plt.subplot(111)\n",
    "\n",
    "\n",
    "# extracting the data\n",
    "\n",
    "labelx = list()\n",
    "valx = list()\n",
    "valy_m = list()\n",
    "valy_f = list()\n",
    "valy_o = list()\n",
    "\n",
    "count = 0\n",
    "\n",
    "for i in range(len(ks_gender_density)):\n",
    "    mysum = sum(ks_gender_density[i,1:4])\n",
    "    \n",
    "    if mysum > 0.:\n",
    "        valx.append(count)\n",
    "        labelx.append(repr(int(ks_gender_density[i][0])))\n",
    "        valy_m.append(ks_gender_density[i][1])\n",
    "        valy_f.append(ks_gender_density[i][2])\n",
    "        valy_o.append(ks_gender_density[i][3])\n",
    "        count += 1\n",
    "    \n",
    "np.asarray(valx)    \n",
    "np.asarray(valy_m)\n",
    "np.asarray(valy_f)\n",
    "np.asarray(valy_o)\n",
    "\n",
    "# plotting\n",
    "\n",
    "ax1.bar(valx, valy_o, width=barwidth, label='Unknown', color=mycolor_unkno)\n",
    "ax1.bar(valx, valy_m, width=barwidth, label='Men', color=mycolor_men,\\\n",
    "       bottom=valy_o)\n",
    "ax1.bar(valx, valy_f, width=barwidth, label='Women', color=mycolor_women,\\\n",
    "       bottom=np.add(valy_o, valy_m) )\n",
    "\n",
    "\n",
    "## setting axes features\n",
    "\n",
    "#ax1.set_ylim([-0.025,1.025])\n",
    "ax1.set_ylim([0.,1.0])\n",
    "ax1.set_xlim([-0.5*barwidth,len(valx)-(0.5*barwidth)])\n",
    "\n",
    "#xticksval = valx[::2]+[valx[-1]]\n",
    "#xtickslab = labelx[::2]+[labelx[-1]]\n",
    "\n",
    "xticksval = [valx[0],valx[-1]]\n",
    "xtickslab = [r'$\\;\\;\\;(k_s = '+labelx[0]+')$',r'$\\!\\!\\!\\!\\!(k_s = '+labelx[-1]+')$']\n",
    "\n",
    "ax1.set_xticks(xticksval)\n",
    "ax1.set_xticklabels(xtickslab)\n",
    "\n",
    "ax1.tick_params(axis='y', which='major', labelsize=25)\n",
    "ax1.tick_params(axis='x', which='major', labelsize=30)\n",
    "ax1.tick_params(axis='x', pad=40)\n",
    "ax1.tick_params(axis='y', pad=10)\n",
    "\n",
    "ax1.set_ylabel('Fraction of authors', fontsize=28, labelpad=10)\n",
    "#ax1.set_xlabel(r'$k_s$', fontsize=30, labelpad=30)\n",
    "\n",
    "# textboxes\n",
    "\n",
    "ax1.text(0.05, -0.025, 'outer', transform=ax1.transAxes, fontsize=25, va='top', ha='center')\n",
    "ax1.text(0.95, -0.025, 'inner', transform=ax1.transAxes, fontsize=25, va='top', ha='center')\n",
    "\n",
    "ax1.annotate('', xy=(0.85, -0.05), xycoords='axes fraction', xytext=(0.15, -0.05),\\\n",
    "             arrowprops=dict(width=0.15, color='k', linewidth=3.5))\n",
    "\n",
    "\n",
    "\n",
    "ax1.spines['right'].set_visible(False)\n",
    "ax1.spines['top'].set_visible(False)\n",
    "\n",
    "\n",
    "\n",
    "# legend\n",
    "\n",
    "#ax2.legend(bbox_to_anchor=(1.05, 0.6),fontsize=20)\n",
    "ax1.legend(loc=(0.03, 1.01),fontsize=22, ncol=3, frameon=False)\n",
    "\n",
    "diroutname = WD[:-4]+'results/'\n",
    "fignameout = 'k_core-gender.pdf'\n",
    "\n",
    "plt.savefig(diroutname+fignameout, bbox_inches='tight')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0c9379b0",
   "metadata": {},
   "source": [
    "# Creation of aleatoric counterpart"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1b3f6b3f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "nr_replicas = 50\n",
    "nr_swaps = 10000\n",
    "maxdeg = max([d for k, d in allG.degree()])\n",
    "\n",
    "\n",
    "ks_gender_density_rand = np.zeros((maxdeg+1,4))\n",
    "\n",
    "for i in range(maxdeg+1):\n",
    "    ks_gender_density_rand[i][0] = i\n",
    "\n",
    "\n",
    "for irep in range(nr_replicas):\n",
    "    print('Iteration nr. ', irep+1)\n",
    "    myG = allG.copy()\n",
    "    \n",
    "    iswap = 0\n",
    "    \n",
    "    while iswap < nr_swaps:\n",
    "        elist = [e for e in myG.edges()]\n",
    "        nredges = len(elist)\n",
    "        selects = np.random.choice(nredges,2)\n",
    "        edgs = [elist[i] for i in selects]\n",
    "        \n",
    "        \n",
    "        iswap += swap_edges(myG,edgs[0],edgs[1])\n",
    "        \n",
    "        if ((iswap % 2500) == 0):\n",
    "            print('\\tswap = ', iswap)\n",
    "        \n",
    "\n",
    "    # extraction of the k-core\n",
    "    nx_coreness = nx.core_number(myG)\n",
    "\n",
    "    for k,v in nx_coreness.items():\n",
    "        ks = v\n",
    "        gend = myG.nodes[k]['gender']\n",
    "\n",
    "\n",
    "        # updating the counters\n",
    "        if gend == 'male':\n",
    "            ks_gender_density_rand[ks][1] += 1.\n",
    "        elif gend == 'female':\n",
    "            ks_gender_density_rand[ks][2] += 1.\n",
    "        else:\n",
    "            ks_gender_density_rand[ks][3] += 1.\n",
    "\n",
    "        \n",
    "# normalization\n",
    "\n",
    "for i in range(maxdeg+1):\n",
    "    norm = sum(ks_gender_density_rand[i][1:4])\n",
    "    \n",
    "    if norm > 0:\n",
    "        for j in range(1,4):\n",
    "            ks_gender_density_rand[i][j] /= norm\n",
    "\n",
    "            \n",
    "# saving on file\n",
    "\n",
    "dummy = list()\n",
    "count = 0\n",
    "\n",
    "for i in range(len(ks_gender_density_rand)):\n",
    "    mysum = sum(ks_gender_density_rand[i,1:4])\n",
    "    \n",
    "    if mysum > 0.:\n",
    "        dummy.append([count, int(ks_gender_density_rand[i][0]), ks_gender_density_rand[i][1],\\\n",
    "                      ks_gender_density_rand[i][2], ks_gender_density_rand[i][3]])\n",
    "        count += 1\n",
    "\n",
    "fnameout = 'k_core-gender-rand_nrep-'+repr(nr_replicas)+'-data.dat'\n",
    "myfmt = '%d %d %.8e %.8e %.8e'\n",
    "print('Saving file %s ...\\n' %(fnameout))\n",
    "\n",
    "np.savetxt(outdirname+fnameout, np.asarray(dummy), fmt=myfmt)\n",
    "            \n",
    "            \n",
    "print('End of k-core analysis')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ecedc34c",
   "metadata": {},
   "source": [
    "### Visualization"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e1c08c88",
   "metadata": {},
   "outputs": [],
   "source": [
    "#### PLOTTING THE RESULTS ####\n",
    "\n",
    "# setting common variables\n",
    "\n",
    "barwidth = 1.\n",
    "\n",
    "fig = plt.figure(figsize=(10,10))\n",
    "fig.subplots(1, 1)\n",
    "\n",
    "### SUBPLOT 1 (GENDER FRACTION PER K-SHELL)\n",
    "\n",
    "ax1 = plt.subplot(111)\n",
    "\n",
    "\n",
    "# extracting the data\n",
    "\n",
    "labelx = list()\n",
    "valx = list()\n",
    "valy_m = list()\n",
    "valy_f = list()\n",
    "valy_o = list()\n",
    "\n",
    "count = 0\n",
    "\n",
    "for i in range(len(ks_gender_density_rand)):\n",
    "    mysum = sum(ks_gender_density_rand[i,1:4])\n",
    "    \n",
    "    if mysum > 0.:\n",
    "        valx.append(count)\n",
    "        labelx.append(repr(int(ks_gender_density_rand[i][0])))\n",
    "        valy_m.append(ks_gender_density_rand[i][1])\n",
    "        valy_f.append(ks_gender_density_rand[i][2])\n",
    "        valy_o.append(ks_gender_density_rand[i][3])\n",
    "        count += 1\n",
    "    \n",
    "np.asarray(valx)    \n",
    "np.asarray(valy_m)\n",
    "np.asarray(valy_f)\n",
    "np.asarray(valy_o)\n",
    "\n",
    "# plotting\n",
    "\n",
    "ax1.bar(valx, valy_o, width=barwidth, label='Unknown', color=mycolor_unkno)\n",
    "ax1.bar(valx, valy_m, width=barwidth, label='Men', color=mycolor_men,\\\n",
    "       bottom=valy_o)\n",
    "ax1.bar(valx, valy_f, width=barwidth, label='Women', color=mycolor_women,\\\n",
    "       bottom=np.add(valy_o, valy_m) )\n",
    "\n",
    "\n",
    "## setting axes features\n",
    "\n",
    "#ax1.set_ylim([-0.025,1.025])\n",
    "ax1.set_ylim([0.,1.0])\n",
    "ax1.set_xlim([-0.5*barwidth,len(valx)-(0.5*barwidth)])\n",
    "\n",
    "#xticksval = valx[::2]+[valx[-1]]\n",
    "#xtickslab = labelx[::2]+[labelx[-1]]\n",
    "\n",
    "xticksval = [valx[0],valx[-1]]\n",
    "xtickslab = [r'$\\;\\;\\;(k_s = '+labelx[0]+')$',r'$\\!\\!\\!\\!\\!(k_s = '+labelx[-1]+')$']\n",
    "\n",
    "ax1.set_xticks(xticksval)\n",
    "ax1.set_xticklabels(xtickslab)\n",
    "\n",
    "ax1.tick_params(axis='y', which='major', labelsize=25)\n",
    "ax1.tick_params(axis='x', which='major', labelsize=30)\n",
    "ax1.tick_params(axis='x', pad=40)\n",
    "ax1.tick_params(axis='y', pad=10)\n",
    "\n",
    "ax1.set_ylabel('Fraction of authors', fontsize=28, labelpad=10)\n",
    "#ax1.set_xlabel(r'$k_s$', fontsize=30, labelpad=30)\n",
    "\n",
    "# textboxes\n",
    "\n",
    "ax1.text(0.05, -0.025, 'outer', transform=ax1.transAxes, fontsize=25, va='top', ha='center')\n",
    "ax1.text(0.95, -0.025, 'inner', transform=ax1.transAxes, fontsize=25, va='top', ha='center')\n",
    "\n",
    "ax1.annotate('', xy=(0.85, -0.05), xycoords='axes fraction', xytext=(0.15, -0.05),\\\n",
    "             arrowprops=dict(width=0.15, color='k', linewidth=3.5))\n",
    "\n",
    "\n",
    "\n",
    "ax1.spines['right'].set_visible(False)\n",
    "ax1.spines['top'].set_visible(False)\n",
    "\n",
    "\n",
    "\n",
    "# legend\n",
    "\n",
    "#ax2.legend(bbox_to_anchor=(1.05, 0.6),fontsize=20)\n",
    "ax1.legend(loc=(0.03, 1.01),fontsize=22, ncol=3, frameon=False)\n",
    "\n",
    "diroutname = WD[:-4]+'results/'\n",
    "fignameout = 'k_core-gender-rand.pdf'\n",
    "\n",
    "plt.savefig(diroutname+fignameout, bbox_inches='tight')\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "59254282",
   "metadata": {},
   "source": [
    "## Visualization of Jaccard matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c001706a",
   "metadata": {},
   "source": [
    "### Functions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5bf67378",
   "metadata": {},
   "outputs": [],
   "source": [
    "def heatmap(data, row_labels, col_labels, ax=None,\n",
    "            cbar_kw={}, cbarlabel=\"\", **kwargs):\n",
    "    \"\"\"\n",
    "    Create a heatmap from a numpy array and two lists of labels.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    data\n",
    "        A 2D numpy array of shape (M, N).\n",
    "    row_labels\n",
    "        A list or array of length M with the labels for the rows.\n",
    "    col_labels\n",
    "        A list or array of length N with the labels for the columns.\n",
    "    ax\n",
    "        A `matplotlib.axes.Axes` instance to which the heatmap is plotted.  If\n",
    "        not provided, use current axes or create a new one.  Optional.\n",
    "    cbar_kw\n",
    "        A dictionary with arguments to `matplotlib.Figure.colorbar`.  Optional.\n",
    "    cbarlabel\n",
    "        The label for the colorbar.  Optional.\n",
    "    **kwargs\n",
    "        All other arguments are forwarded to `imshow`.\n",
    "    \"\"\"\n",
    "\n",
    "    if not ax:\n",
    "        ax = plt.gca()\n",
    "\n",
    "    # applying mask to array\n",
    "    mymask =  np.tri(data.shape[0], k=-1.)\n",
    "    data = np.ma.array(data, mask=mymask)\n",
    "        \n",
    "    # Plot the heatmap\n",
    "    im = ax.imshow(data, **kwargs)\n",
    "\n",
    "    # Create colorbar\n",
    "    cbar = ax.figure.colorbar(im, ax=ax, **cbar_kw)\n",
    "    cbar.ax.set_ylabel(cbarlabel, rotation=-90, va=\"bottom\")\n",
    "\n",
    "    # Show all ticks and label them with the respective list entries.\n",
    "    ax.set_xticks(np.arange(data.shape[1]), labels=col_labels)\n",
    "    ax.set_yticks(np.arange(data.shape[0]), labels=row_labels)\n",
    "\n",
    "    # Let the horizontal axes labeling appear on top.\n",
    "    ax.tick_params(top=True, bottom=False,\n",
    "                   labeltop=True, labelbottom=False)\n",
    "\n",
    "    # Rotate the tick labels and set their alignment.\n",
    "    plt.setp(ax.get_xticklabels(), ha=\"center\",\n",
    "             rotation_mode=\"anchor\")\n",
    "\n",
    "    # Turn spines off and create white grid.\n",
    "    ax.spines[:].set_visible(False)\n",
    "\n",
    "    ax.set_xticks(np.arange(data.shape[1]+1)-.5, minor=True)\n",
    "    ax.set_yticks(np.arange(data.shape[0]+1)-.5, minor=True)\n",
    "    ax.grid(which=\"minor\", color=\"w\", linestyle='-', linewidth=4)\n",
    "    ax.tick_params(which=\"minor\", bottom=False, left=False)\n",
    "\n",
    "    return im, cbar\n",
    "\n",
    "\n",
    "def annotate_heatmap(im, data=None, valfmt=\"{x:.2f}\",\n",
    "                     textcolors=(\"black\", \"white\"),\n",
    "                     threshold=None, **textkw):\n",
    "    \"\"\"\n",
    "    A function to annotate a heatmap.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    im\n",
    "        The AxesImage to be labeled.\n",
    "    data\n",
    "        Data used to annotate.  If None, the image's data is used.  Optional.\n",
    "    valfmt\n",
    "        The format of the annotations inside the heatmap.  This should either\n",
    "        use the string format method, e.g. \"$ {x:.2f}\", or be a\n",
    "        `matplotlib.ticker.Formatter`.  Optional.\n",
    "    textcolors\n",
    "        A pair of colors.  The first is used for values below a threshold,\n",
    "        the second for those above.  Optional.\n",
    "    threshold\n",
    "        Value in data units according to which the colors from textcolors are\n",
    "        applied.  If None (the default) uses the middle of the colormap as\n",
    "        separation.  Optional.\n",
    "    **kwargs\n",
    "        All other arguments are forwarded to each call to `text` used to create\n",
    "        the text labels.\n",
    "    \"\"\"\n",
    "\n",
    "    if not isinstance(data, (list, np.ndarray)):\n",
    "        data = im.get_array()\n",
    "\n",
    "    # Normalize the threshold to the images color range.\n",
    "    if threshold is not None:\n",
    "        threshold = im.norm(threshold)\n",
    "    else:\n",
    "        threshold = im.norm(data.max())/2.\n",
    "\n",
    "    # Set default alignment to center, but allow it to be\n",
    "    # overwritten by textkw.\n",
    "    kw = dict(horizontalalignment=\"center\",\n",
    "              verticalalignment=\"center\")\n",
    "    kw.update(textkw)\n",
    "\n",
    "    # Get the formatter in case a string is supplied\n",
    "    if isinstance(valfmt, str):\n",
    "        valfmt = mpl.ticker.StrMethodFormatter(valfmt)\n",
    "\n",
    "    # Loop over the data and create a `Text` for each \"pixel\".\n",
    "    # Change the text's color depending on the data.\n",
    "    texts = []\n",
    "    for i in range(data.shape[0]):\n",
    "        for j in range(data.shape[1]):\n",
    "            if j > i:\n",
    "                kw.update(color=textcolors[int(im.norm(data[i, j]) > threshold)])\n",
    "                text = im.axes.text(j, i, valfmt(data[i, j], None), **kw)\n",
    "                texts.append(text)\n",
    "            elif j == i:\n",
    "                kw.update(color='snow')\n",
    "                text = im.axes.text(j, i, valfmt(1., None), **kw)\n",
    "                texts.append(text)\n",
    "            else:\n",
    "                kw.update(color='white')\n",
    "                text = im.axes.text(j, i, '')\n",
    "                texts.append(text)\n",
    "\n",
    "                \n",
    "    return texts\n",
    "\n",
    "print('Functions loaded!')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26570c44",
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots(figsize=(5,5))\n",
    "\n",
    "im, cbar = heatmap(jacmatrix, label_list, label_list, ax=ax,\n",
    "                   cmap=\"Blues\", cbar_kw={'fraction': 0.0425}, cbarlabel='',\\\n",
    "                   vmin=0., vmax=0.035)\n",
    "texts = annotate_heatmap(im, threshold=0.015, valfmt=\"{x:.3f}\")\n",
    "\n",
    "ax.tick_params(axis='both',length=0,labelsize=15)\n",
    "\n",
    "cbar.ax.tick_params(axis='y',length=12,labelsize=12,color='white')\n",
    "cbar.ax.set_yticks(np.arange(0.,0.035,0.005))\n",
    "cbar.ax.set_ylabel(r\"Jaccard $J$\", fontsize=18)\n",
    "\n",
    "cbar.outline.set_color('white')\n",
    "cbar.outline.set_linewidth(0.2)\n",
    "cbar.dividers.set_color('white')\n",
    "cbar.dividers.set_linewidth(0.2)\n",
    "\n",
    "\n",
    "diroutname = WD[:-4]+'results/'\n",
    "fignameout = 'graphs-edge-jaccard.pdf'\n",
    "\n",
    "plt.savefig(diroutname+fignameout, bbox_inches='tight')\n",
    "\n",
    "\n",
    "fig.tight_layout()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f888cae",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
